home *** CD-ROM | disk | FTP | other *** search
/ Developer CD Series 2000 November: Tool Chest / Dev.CD Nov 00 TC Disk 1.toast / Sample Code / Contributed / SpriteWorld / SpriteWorld Files / BlitPixie / Sources / BlitPixieDoubled.c < prev    next >
Encoding:
Text File  |  2000-10-06  |  11.7 KB  |  427 lines  |  [TEXT/CWIE]

  1. ///--------------------------------------------------------------------------------------
  2. //    BlitPixieDoubled
  3. //            a fast pixel-doubling blitter
  4. //
  5. //    written by Anders F Björklund <afb@algonet.se>
  6. //    ©1999 afb.
  7. ///--------------------------------------------------------------------------------------
  8.  
  9. #ifndef __BLITPIXIE__
  10. #include "BlitPixieHeader.h"
  11. #endif
  12.  
  13. #include "BlitPixieAsm.h"
  14.  
  15. #pragma mark *** PowerPC asm:
  16. #if USE_PPC_ASSEMBLY
  17.  
  18. ASM_FUNC void BlitPixieDoubled8Bit(
  19.     register unsigned char *source,        register unsigned char *destination,    
  20.     register unsigned long srcRowBytes,    register unsigned long dstRowBytes,
  21.     register unsigned short width,        register unsigned short height)
  22. {
  23.     #define    r_src                r3
  24.     #define    r_dst                r4
  25.     #define    r_srcRowBytes        r5
  26.     #define    r_dstRowBytes        r6
  27.     #define    r_width                r7
  28.     #define    r_height            r8
  29.  
  30.     #define    r_dst2                r9
  31.     #define    r_temp1                r10
  32.     #define    r_temp2                r11
  33.     
  34.     ASM_BEGIN
  35.     
  36.     subi    r_dst,r_dst,8
  37.     add        r_dst2,r_dst,r_dstRowBytes
  38.     sub        r_srcRowBytes,r_srcRowBytes,r_width        // subtract width from rowbytes for stride
  39.     sub        r_dstRowBytes,r_dstRowBytes,r_width        // subtract width from rowbytes for stride
  40.     add        r_dstRowBytes,r_dstRowBytes,r_dstRowBytes
  41.     rlwinm    r_width,r_width,32-2,2,31
  42.     subi    r_src,r_src,4
  43.     subi    r_width,r_width,1
  44.  
  45. @rowloop:
  46.     lwzu     r0,4(r_src)                            // load 4 pixels into r0
  47.  
  48.      /*    Pixel building process:
  49.          r0:        ABCD            
  50.          r10:                A***    AAB*    AABB        AABB****
  51.          r11:                ***D    *CDD    CCDD        ****CCDD
  52.          fp0:                                            AABBCCDD
  53.      */
  54.  
  55.      mr       r10,r0                                // put copy in r10
  56.     mr       r11,r0                               // and in r11
  57.  
  58.     rlwimi   r10,r0,24,8,23                       // copy upper 16 bits to middle of r10
  59.     rlwimi   r11,r0,8,8,23                      // copy lower 16 bits to middle of r11
  60.  
  61.     rlwimi   r10,r0,16,24,31                      // get remaining bits into r10
  62.     stw      r10,-8(SP)                            // store upper 4 pixels into part of double
  63.  
  64.     rlwimi   r11,r0,16,0,7                        // get remaining bits into r11
  65.     stw      r11,-4(SP)                            // store lower 4 pixels into part of double
  66.  
  67.     mtctr    r_width                            // copy width into counter
  68.     lfd      fp0,-8(SP)                         // load double
  69.  
  70.     @loop:
  71.         lwzu     r0,4(r_src)                    // load 4 pixels into r10
  72.         stfdu     fp0,8(r_dst)                    // store a double from before
  73.  
  74.     //    (same as above)
  75.         mr       r10,r0                            // put copy in r10
  76.         mr       r11,r0                           // and in r11
  77.         rlwimi   r10,r0,24,8,23                 // copy upper 16 bits to middle of r10
  78.         rlwimi   r11,r0,8,8,23                  // copy lower 16 bits to middle of r11
  79.         rlwimi   r10,r0,16,24,31                  // get remaining bits into r10
  80.         stw      r10,-8(SP)                        // store upper 4 pixels into part of double
  81.         rlwimi   r11,r0,16,0,7                    // get remaining bits into r11
  82.         stw      r11,-4(SP)                        // store lower 4 pixels into part of double
  83.  
  84.         stfdu    fp0,8(r_dst2)                    // store a double from before
  85.         lfd        fp0,-8(SP)                        // load double
  86.  
  87.         bdnz+    @loop                            // loop over all x
  88.         
  89.     stfdu    fp0,8(r_dst)
  90.     
  91.     subic.    r_height,r_height,1    
  92.     add        r_src,r_src,r_srcRowBytes    
  93.     add        r_dst,r_dst,r_dstRowBytes    
  94.     
  95.     stfdu    fp0,8(r_dst2)                        // store a double from before
  96.     add        r_dst2,r_dst2,r_dstRowBytes    
  97.     
  98.     bne        @rowloop                            // loop over all y
  99.  
  100.     ASM_END
  101. }
  102.  
  103. ASM_FUNC void BlitPixieDoubled16Bit(
  104.     register unsigned short *source,    register unsigned short *destination,    
  105.     register unsigned long srcRowBytes,    register unsigned long dstRowBytes,
  106.     register unsigned short width,        register unsigned short height)
  107. {
  108.     #define    r_src                r3
  109.     #define    r_dst                r4
  110.     #define    r_srcRowBytes        r5
  111.     #define    r_dstRowBytes        r6
  112.     #define    r_width                r7
  113.     #define    r_height            r8
  114.  
  115.     #define    r_dst2                r9
  116.     #define    r_temp1                r10
  117.     #define    r_temp2                r11
  118.     
  119.     ASM_BEGIN
  120.     
  121.     subi    r_dst,r_dst,8
  122.     add        r_dst2,r_dst,r_dstRowBytes
  123.     sub        r_srcRowBytes,r_srcRowBytes,r_width        // subtract 2*width from rowbytes for stride
  124.     sub        r_srcRowBytes,r_srcRowBytes,r_width
  125.     sub        r_dstRowBytes,r_dstRowBytes,r_width        // subtract 2*width from rowbytes for stride
  126.     sub        r_srcRowBytes,r_srcRowBytes,r_width
  127.     add        r_dstRowBytes,r_dstRowBytes,r_dstRowBytes
  128.     rlwinm    r_width,r_width,32-2,2,31
  129.     subi    r_src,r_src,4
  130.     subi    r_width,r_width,1
  131.  
  132. @rowloop:
  133.     lwzu     r0,4(r_src)                            // load 2 pixels into r0
  134.  
  135.      /*    Pixel building process:
  136.          r0:        AABB            
  137.          r10:                AA**    AAAA               AAAA****
  138.          r11:                **BB    BBBB            ****BBBB
  139.          fp0:                                        AAAABBBB
  140.      */
  141.  
  142.      mr       r10,r0                                // put copy in r10
  143.     mr       r11,r0                               // and in r11
  144.  
  145.     rlwimi   r10,r0,16,16,31                       // copy upper 16 bits to lower of r10
  146.     rlwimi   r11,r0,16,0,15                      // copy lower 16 bits to upper of r11
  147.  
  148.     stw      r10,-8(SP)                            // store upper 4 pixels into part of double
  149.     stw      r11,-4(SP)                            // store lower 4 pixels into part of double
  150.  
  151.     mtctr    r_width                            // copy width into counter
  152.     lfd      fp0,-8(SP)                         // load double
  153.  
  154.     @loop:
  155.         lwzu     r0,4(r_src)                    // load 2 pixels into r10
  156.         stfdu     fp0,8(r_dst)                    // store a double from before
  157.  
  158.     //    (same as above)
  159.         mr       r10,r0                            // put copy in r10
  160.         mr       r11,r0                           // and in r11
  161.         rlwimi   r10,r0,16,16,31                       // copy upper 16 bits to lower of r10
  162.         rlwimi   r11,r0,16,0,15                      // copy lower 16 bits to upper of r11
  163.         stw      r10,-8(SP)                        // store upper 4 pixels into part of double
  164.         stw      r11,-4(SP)                        // store lower 4 pixels into part of double
  165.  
  166.         stfdu    fp0,8(r_dst2)                    // store a double from before
  167.         lfd        fp0,-8(SP)                        // load double
  168.  
  169.         bdnz+    @loop                            // loop over all x
  170.         
  171.     stfdu    fp0,8(r_dst)
  172.     
  173.     subic.    r_height,r_height,1    
  174.     add        r_src,r_src,r_srcRowBytes    
  175.     add        r_dst,r_dst,r_dstRowBytes    
  176.     
  177.     stfdu    fp0,8(r_dst2)                        // store a double from before
  178.     add        r_dst2,r_dst2,r_dstRowBytes    
  179.     
  180.     bne        @rowloop                            // loop over all y
  181.  
  182.     ASM_END
  183. }
  184.  
  185. #pragma mark *** 680x0 asm:
  186. #elif USE_68K_ASSEMBLY
  187.  
  188. ASM_FUNC void BlitPixieDoubled8Bit(
  189.     unsigned char *source,            unsigned char *destination,
  190.     unsigned long srcBytes,            unsigned long dstBytes,
  191.     unsigned short width,            unsigned short height )
  192. {
  193.     #define     A_src                              A0
  194.     #define  A_dst                              A1
  195.     #define  A_dst2                              A2
  196.  
  197.     #define     D_srcRowBytes                      D3
  198.     #define  D_dstRowBytes                      D4
  199.     #define  D_x                              D5
  200.     #define  D_y                              D6
  201.  
  202.     #define  D_temp1                          D0
  203.     #define  D_temp2                          D1
  204.     #define  D_pixel                          D7
  205.  
  206.     ASM_BEGIN
  207.     MOVEM.L        D3-D7/A2,-(SP)
  208.  
  209.     MOVEM.L        source,A0-A1
  210.     MOVEM.L        srcBytes,D3-D6
  211.  
  212.     ANDI.W        #~3,D_x
  213.     
  214.     MOVEA.L        A_dst,A_dst2
  215.     ADDA.L        D_dstRowBytes,A_dst2
  216.     
  217.     SUB.L        D_x,D_srcRowBytes
  218.     SUB.L        D_x,D_dstRowBytes
  219.     ADD.L       D_dstRowBytes,D_dstRowBytes        /* two rows in dest per loop */
  220.     LSR.W       #2,D_x                        /* four pixels per loop */
  221.  
  222.     SWAP        D_y    /* cleverly use hi-word for y count (was out of registers) */
  223.         
  224. @NextRow:
  225.  
  226.         MOVE.W        D_x,D_y        /* using 'y' for both x/y counts */
  227.  
  228.     @NextPixels:
  229.  
  230.             MOVE.L    (A_src)+, D_temp1    /* get the four source pixels */
  231.  
  232.             MOVE.L    D_temp1,D_temp2        /* make a copy */
  233.                 
  234.                 LSR.L    #8,D_temp1        /* "space out" two … */
  235.                 LSR.W    #8,D_temp1        /* … adjacent pixels */
  236.                 MOVE.L    D_temp1,D_pixel        /* copy "out spaced" pixels */
  237.                 LSL.L    #8,D_pixel        /* shift the copy over … */
  238.                 OR.L    D_temp1,D_pixel        /* … and recombine */
  239.             
  240.                 MOVE.L    D_pixel,(A_dst)+    /* write the first two pixels */
  241.                 MOVE.L    D_pixel,(A_dst2)+    /* and write to the next row too */
  242.             
  243.             SWAP    D_temp2            /* do next two pixels */
  244.  
  245.                 LSR.L    #8,D_temp2        /* repeat, as above */
  246.                 LSR.W    #8,D_temp2
  247.                 MOVE.L    D_temp2,D_pixel
  248.                 LSL.L    #8,D_pixel
  249.                 OR.L    D_temp2,D_pixel
  250.             
  251.                 MOVE.L    D_pixel,(A_dst)+    /* write the last two pixels */
  252.                 MOVE.L    D_pixel,(A_dst2)+    /* and write to the next row too */
  253.  
  254.             SUBQ.W        #1, D_y
  255.             BNE.S        @NextPixels
  256.  
  257.         ADDA.L    D_srcRowBytes,A_src        /* bump to next row */
  258.         ADDA.L    D_dstRowBytes,A_dst
  259.         ADDA.L    D_dstRowBytes,A_dst2
  260.     
  261.         SUB.L    #0x00010000, D_y
  262.         BNE.S    @NextRow
  263.  
  264.     MOVEM.L        (SP)+,D3-D7/A2
  265.     ASM_END
  266. }
  267.  
  268. ASM_FUNC void BlitPixieDoubled16Bit(
  269.     unsigned short *source,            unsigned short *destination,
  270.     unsigned long srcBytes,            unsigned long dstBytes,
  271.     unsigned short width,            unsigned short height )
  272. {
  273.     #define     A_src                              A0
  274.     #define  A_dst                              A1
  275.     #define  A_dst2                              A2
  276.  
  277.     #define     D_srcRowBytes                      D3
  278.     #define  D_dstRowBytes                      D4
  279.     #define  D_x                               D5
  280.     #define  D_y                              D6
  281.  
  282.     #define  D_temp1                          D0
  283.     #define  D_temp2                          D1
  284.     #define  D_pixel                          D7
  285.  
  286.     ASM_BEGIN
  287.     MOVEM.L        D3-D7/A2,-(SP)
  288.  
  289.     MOVEM.L        source,A0-A1
  290.     MOVEM.L        srcBytes,D3-D6
  291.  
  292.     ANDI.W        #~3,D_x
  293.     
  294.     MOVEA.L        A_dst,A_dst2
  295.     ADDA.L        D_dstRowBytes,A_dst2
  296.     
  297.     SUB.L        D_x,D_srcRowBytes
  298.     SUB.L        D_x,D_srcRowBytes
  299.     SUB.L        D_x,D_dstRowBytes
  300.     SUB.L        D_x,D_dstRowBytes
  301.     ADD.L       D_dstRowBytes,D_dstRowBytes        /* two rows in dest per loop */
  302.     LSR.W       #1,D_x                        /* two pixels per loop */
  303.  
  304.     SWAP        D_y    /* cleverly use hi-word for y count (was out of registers) */
  305.         
  306. @NextRow:
  307.  
  308.         MOVE.W        D_x,D_y        /* using 'y' for both x/y counts */
  309.  
  310.     @NextPixels:
  311.  
  312.             MOVE.L    (A_src)+, D_temp1    /* get the two source pixels */
  313.  
  314.             MOVE.L    D_temp1,D_temp2        /* make a copy */
  315.                 
  316.                 SWAP    D_temp1
  317.                 MOVE.W    D_temp1,D_pixel
  318.                 SWAP    D_pixel
  319.                 MOVE.W    D_temp1,D_pixel
  320.             
  321.                 MOVE.L    D_pixel,(A_dst)+    /* write the first two pixels */
  322.                 MOVE.L    D_pixel,(A_dst2)+    /* and write to the next row too */
  323.             
  324.             SWAP    D_temp2            /* do next two pixels */
  325.  
  326.                 MOVE.W    D_temp2,D_pixel
  327.                 SWAP    D_pixel
  328.                 MOVE.W    D_temp2,D_pixel
  329.             
  330.                 MOVE.L    D_pixel,(A_dst)+    /* write the last two pixels */
  331.                 MOVE.L    D_pixel,(A_dst2)+    /* and write to the next row too */
  332.  
  333.             SUBQ.W        #1, D_y
  334.             BNE.S        @NextPixels
  335.  
  336.         ADDA.L    D_srcRowBytes,A_src        /* bump to next row */
  337.         ADDA.L    D_dstRowBytes,A_dst
  338.         ADDA.L    D_dstRowBytes,A_dst2
  339.     
  340.         SUB.L    #0x00010000, D_y
  341.         BNE.S    @NextRow
  342.  
  343.     MOVEM.L        (SP)+,D3-D7/A2
  344.     ASM_END
  345. }
  346.  
  347. #pragma mark *** Generic C:
  348. #elif USE_GENERIC_C
  349.  
  350. void BlitPixieDoubled8Bit(
  351.     unsigned char *source,            unsigned char *destination,
  352.     unsigned long srcRowBytes,        unsigned long dstRowBytes,
  353.     unsigned short width,            unsigned short height)
  354. {
  355.     int            x,y;
  356.     int            x2,y2;
  357.     unsigned char        c;
  358.  
  359.     for ( y = 0, y2 = 0; y < height; y++, y2 += 2 )
  360.     {
  361.         for ( x = 0, x2 = 0; x < width; x++, x2 += 2 )
  362.         {
  363.             c = source[ y * srcRowBytes + x ];
  364.             
  365.             destination[ y2 * dstRowBytes + x2 ] = c;
  366.             destination[ y2 * dstRowBytes + (x2 + 1) ] = c;
  367.             destination[ (y2 + 1) * dstRowBytes + x2 ] = c;
  368.             destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c;
  369.         }
  370.     }
  371. }
  372.  
  373. void BlitPixieDoubled16Bit(
  374.     unsigned short *source,            unsigned short *destination,
  375.     unsigned long srcRowBytes,        unsigned long dstRowBytes,
  376.     unsigned short width,            unsigned short height)
  377. {
  378.     int            x,y;
  379.     int            x2,y2;
  380.     unsigned short        c;
  381.  
  382.     for ( y = 0, y2 = 0; y < height; y++, y2 += 2 )
  383.     {
  384.         for ( x = 0, x2 = 0; x < width; x++, x2 += 2 )
  385.         {
  386.             c = source[ y * srcRowBytes + x ];
  387.             
  388.             destination[ y2 * dstRowBytes + x2 ] = c;
  389.             destination[ y2 * dstRowBytes + (x2 + 1) ] = c;
  390.             destination[ (y2 + 1) * dstRowBytes + x2 ] = c;
  391.             destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c;
  392.         }
  393.     }
  394. }
  395.  
  396. #endif
  397.  
  398. #pragma mark -
  399.  
  400. #ifndef GENERATINGASM // do not include for asm file generation
  401.  
  402. void BlitPixieDoubled32Bit(
  403.     unsigned long *source,            unsigned long *destination,
  404.     unsigned long srcRowBytes,        unsigned long dstRowBytes,
  405.     unsigned short width,            unsigned short height)
  406. {
  407.     int            x,y;
  408.     int            x2,y2;
  409.     unsigned long        c;
  410.  
  411.     for ( y = 0, y2 = 0; y < height; y++, y2 += 2 )
  412.     {
  413.         for ( x = 0, x2 = 0; x < width; x++, x2 += 2 )
  414.         {
  415.             c = source[ y * srcRowBytes + x ];
  416.             
  417.             destination[ y2 * dstRowBytes + x2 ] = c;
  418.             destination[ y2 * dstRowBytes + (x2 + 1) ] = c;
  419.             destination[ (y2 + 1) * dstRowBytes + x2 ] = c;
  420.             destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c;
  421.         }
  422.     }
  423. }
  424.  
  425. #endif
  426.  
  427.